library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
bigquery_mutation_stat = read.csv("/Users/qqin/Documents/23Q4_all_samples_stat.csv")
p <- ggboxplot(melt(bigquery_mutation_stat[, c("gnomad_23Q4", "popaf_23q2_standard")], variable.name='release_and_database'),
x = "release_and_database", y = "value",
color = "release_and_database", palette =c("#00AFBB", "#FC4E07"),
add = "jitter", shape = "release_and_database") + ylab('Frequency')
## No id variables; using all as measure variables
my_comparisons <- list(c('gnomad_23Q4', "popaf_23q2_standard"))
print(p + stat_compare_means(comparisons = my_comparisons, method = "t.test") + ylab("Somatic variants after gnomAD filter") + xlab(""))
oncoplot(test_23Q4_ccle_maf, genes=names(sort(table(test_23Q4_ccle_maf@data[grepl('^Gain', test_23Q4_ccle_maf@data$oncokb_effect), 'Hugo_Symbol']), decreasing=T)[1:20]), top = 20, gene_mar=8)
oncoplot(test_23Q4_ccle_maf, genes=names(table(test_23Q4_ccle_maf@data[grepl('^Y', test_23Q4_ccle_maf@data$hess_driver) | (test_23Q4_ccle_maf@data$oncokb_hotspot=='Y'), 'Hugo_Symbol'])[1:20]), top = 20)
After filtering by the internal cohort af \(af < 0.05\).
## Warning in FUN(X[[i]], ...): Removed 0 samples with zero mutations.
## Warning in FUN(X[[i]], ...): Removed 0 samples with zero mutations.
## Warning in FUN(X[[i]], ...): Removed 0 samples with zero mutations.
## Warning: Removed 18 rows containing missing values (`position_stack()`).
## Warning: Removed 54 rows containing missing values (`position_stack()`).
mike_biomarkers = read.csv("~/Downloads/bench_corrs_for_mike_new (1).csv")
mike_biomarkers_mutation = subset(mike_biomarkers, feat_type!='CN' & feat_type!='GE')
mike_biomarkers_mutation_nona = mike_biomarkers_mutation[!is.na(mike_biomarkers_mutation$cor),]
mike_biomarkers_mutation_nona = subset(mike_biomarkers_mutation_nona, model=='CERES_CRISPR')
res = list()
for (row_index in 1:nrow(mike_biomarkers_mutation_nona)) {
feat = mike_biomarkers_mutation_nona[row_index, 'Feat_gene']
dep = mike_biomarkers_mutation_nona[row_index, 'Dep_Gene']
res[[paste0(feat, "_", dep)]] = tryCatch({
model=plot_dependency(feat, dep, any_mutation=T, plot=F)
c(model[1], model[2], model[3], model[4])
}, error=function(e) c(NA, NA, NA, NA))
}
## [1] "ARID1B..57492."
## NULL
##
## ARID1A Non
## 169 1036
## CRISPR LoF
## 1 0.02086464 ARID1A
## 2 -0.77398300 ARID1A
## 3 -0.06621979 ARID1A
## 4 -1.80534223 ARID1A
## 5 -0.61172793 ARID1A
## 6 -0.47491534 ARID1A
## [1] "BRAF..673."
## NULL
##
## BRAF Non
## 127 1078
## CRISPR LoF
## 1 -0.09305552 BRAF
## 2 -1.81956333 BRAF
## 3 -0.73334082 BRAF
## 4 -1.32623136 BRAF
## 5 -0.79960230 BRAF
## 6 -1.31084344 BRAF
## [1] "EGF..1950."
## NULL
##
## EGFR Non
## 73 1132
## CRISPR LoF
## 1 0.139012119 EGFR
## 2 -0.007850668 EGFR
## 3 0.071310771 EGFR
## 4 -0.070910030 EGFR
## 5 -0.063671598 EGFR
## 6 0.131968428 EGFR
## [1] "KRAS..3845."
## NULL
##
## KRAS Non
## 197 1008
## CRISPR LoF
## 1 -2.475868 KRAS
## 2 -2.246003 KRAS
## 3 -1.845185 KRAS
## 4 -3.137764 KRAS
## 5 -2.391494 KRAS
## 6 -0.827603 KRAS
## [1] "NRAS..4893."
## NULL
##
## Non NRAS
## 1138 67
## CRISPR LoF
## 1 -0.0975696 NRAS
## 2 -1.7831998 NRAS
## 3 -1.6068707 NRAS
## 4 -1.0833982 NRAS
## 5 -1.2798583 NRAS
## 6 -1.3879226 NRAS
## [1] "MTOR..2475."
## NULL
##
## Non PIK3CA
## 1062 143
## CRISPR LoF
## 1 -0.8464532 PIK3CA
## 2 -1.1011345 PIK3CA
## 3 -0.8463668 PIK3CA
## 4 -1.1620927 PIK3CA
## 5 -1.4386328 PIK3CA
## 6 -1.0168104 PIK3CA
## [1] "MDM2..4193."
## NULL
##
## Non TP53
## 386 819
## CRISPR LoF
## 1 -0.31832140 TP53
## 2 -0.03748236 TP53
## 3 -0.14197657 TP53
## 4 -1.62768735 TP53
## 5 -0.25559085 TP53
## 6 -0.44362467 TP53
## [1] "CTNNB1..1499."
## NULL
##
## APC Non
## 142 1063
## CRISPR LoF
## 1 -0.9788010 APC
## 2 -1.3508856 APC
## 3 -0.4138809 APC
## 4 -0.2129251 APC
## 5 -0.9388554 APC
## 6 -0.2255311 APC
## [1] "CTNNB1..1499."
## NULL
##
## CTNNB1 Non
## 61 1144
## CRISPR LoF
## 1 -1.024287 CTNNB1
## 2 -0.978801 CTNNB1
## 3 -1.076761 CTNNB1
## 4 -1.363486 CTNNB1
## 5 -1.360818 CTNNB1
## 6 -1.708011 CTNNB1
## [1] "ERBB2..2064."
## NULL
##
## ERBB2 Non
## 47 1158
## CRISPR LoF
## 1 -0.1619255 ERBB2
## 2 -0.3871655 ERBB2
## 3 -0.2942801 ERBB2
## 4 -0.3821129 ERBB2
## 5 -0.2863837 ERBB2
## 6 -1.4108343 ERBB2
## [1] "ERBB4..2066."
## NULL
##
## ERBB4 Non
## 84 1121
## CRISPR LoF
## 1 -0.21059930 ERBB4
## 2 -0.15385767 ERBB4
## 3 -0.04447189 ERBB4
## 4 -0.24311890 ERBB4
## 5 -0.18806732 ERBB4
## 6 -0.10381109 ERBB4
## [1] "EZH2..2146."
## NULL
##
## EZH2 Non
## 39 1166
## CRISPR LoF
## 1 -0.1434961 EZH2
## 2 -0.1285251 EZH2
## 3 -1.5275773 EZH2
## 4 -1.0537890 EZH2
## 5 -0.7269203 EZH2
## 6 -0.9224066 EZH2
## [1] "FLT3..2322."
## NULL
##
## FLT3 Non
## 54 1151
## CRISPR LoF
## 1 0.053693382 FLT3
## 2 -1.113222142 FLT3
## 3 0.003950539 FLT3
## 4 -1.990717193 FLT3
## 5 -0.024649067 FLT3
## 6 0.045250929 FLT3
## [1] "HRAS..3265."
## NULL
##
## HRAS Non
## 20 1185
## CRISPR LoF
## 1 -1.29103793 HRAS
## 2 0.04029183 HRAS
## 3 -1.80538658 HRAS
## 4 -0.56986289 HRAS
## 5 -0.14533754 HRAS
## 6 -0.27036669 HRAS
## [1] "IDH1..3417."
## NULL
##
## IDH1 Non
## 18 1187
## CRISPR LoF
## 1 -0.379589070 IDH1
## 2 -0.109113210 IDH1
## 3 -0.001456304 IDH1
## 4 0.035921015 IDH1
## 5 -0.200207562 IDH1
## 6 0.130237240 IDH1
## [1] "KIT..3815."
## NULL
##
## KIT Non
## 47 1158
## CRISPR LoF
## 1 -0.11250366 KIT
## 2 -0.02121563 KIT
## 3 -0.11054184 KIT
## 4 -0.85855100 KIT
## 5 -0.14079311 KIT
## 6 -0.12120066 KIT
## [1] "MAP3K1..4214."
## NULL
##
## MAP2K1 Non
## 24 1181
## CRISPR LoF
## 1 -0.13819325 MAP2K1
## 2 -0.10729144 MAP2K1
## 3 -0.07475951 MAP2K1
## 4 -0.20591925 MAP2K1
## 5 -0.12886647 MAP2K1
## 6 -0.09137880 MAP2K1
## [1] "MTOR..2475."
## NULL
##
## MTOR Non
## 73 1132
## CRISPR LoF
## 1 -1.583739 MTOR
## 2 -1.428384 MTOR
## 3 -1.077943 MTOR
## 4 -1.295943 MTOR
## 5 -1.514911 MTOR
## 6 -1.407657 MTOR
## [1] "SMARCA2..6595."
## NULL
##
## Non SMARCA4
## 1057 148
## CRISPR LoF
## 1 -0.1365668 SMARCA4
## 2 0.2152061 SMARCA4
## 3 -1.4339477 SMARCA4
## 4 -0.4926894 SMARCA4
## 5 0.1038026 SMARCA4
## 6 -0.6954682 SMARCA4
## [1] "PIK3CA..5290."
## NULL
##
## Non PTEN
## 1064 141
## CRISPR LoF
## 1 -0.33078062 PTEN
## 2 -0.47265240 PTEN
## 3 -0.50748704 PTEN
## 4 -0.06810845 PTEN
## 5 -0.34723434 PTEN
## 6 -0.23058936 PTEN
## [1] "EIF4E..1977."
## NULL
##
## Non TSC1
## 1178 27
## CRISPR LoF
## 1 -1.565571 TSC1
## 2 -1.381869 TSC1
## 3 -2.342957 TSC1
## 4 -1.702713 TSC1
## 5 -1.944665 TSC1
## 6 -1.493313 TSC1
mike_biomarkers_mutation_nona[, 'effect_size_23Q4'] = sapply(res, function(x) x[1])
mike_biomarkers_mutation_nona[, 'FDR_23Q4'] = sapply(res, function(x) x[2])
mike_biomarkers_mutation_nona[, 'Cor_23Q4'] = sapply(res, function(x) x[4])
mike_biomarkers_mutation_nona[, 'Corpvalue_23Q4'] = sapply(res, function(x) x[3])
mike_biomarkers_mutation_nona[, 'Significant_23Q4'] <- ifelse(mike_biomarkers_mutation_nona[, 'FDR_23Q4'] < 0.05 & mike_biomarkers_mutation_nona[, 'effect_size_23Q4'] > 0, "Positive FDR < 0.05", "Not Sig")
mike_biomarkers_mutation_nona[, 'Significant_23Q4'][mike_biomarkers_mutation_nona[, 'FDR_23Q4'] < 0.05 & mike_biomarkers_mutation_nona[, 'effect_size_23Q4'] < 0] <- "Negative FDR < 0.05"
mike_biomarkers_mutation_nona[, 'CRISPR_Mutation'] = paste0(mike_biomarkers_mutation_nona$Dep_Gene, "~", mike_biomarkers_mutation_nona$Feat_gene)
p1 = ggplot(mike_biomarkers_mutation_nona, aes(x = -log10(p.value), y = -log10(Corpvalue_23Q4))) +
geom_point(size=5) + xlab("-log 10 Before 23Q2 correlation p value") + ylab("-log 10 23Q4 correlation p value") + geom_abline() + xlim(0, 250) + ylim(0, 250)
p2 = ggplot(mike_biomarkers_mutation_nona, aes(x = cor, y = Cor_23Q4)) +
geom_point(size=5) + xlab("Before 23Q2 Correlation") + ylab("23Q4 correlation") + geom_abline() + xlim(-1, 1) + ylim(-1, 1)
p1 / p2
ggplot(mike_biomarkers_mutation_nona, aes(x = effect_size_23Q4, y = -log10(FDR_23Q4))) +
geom_point(aes(color = Significant_23Q4), size=6) +
scale_color_manual(values = c("blue", "grey", "red" )) +
theme_bw(base_size = 20) + theme(legend.position = "bottom") + xlim(-1.5, 1.5) +
geom_text_repel(
data = subset(mike_biomarkers_mutation_nona, FDR_23Q4 < 0.05),
aes(label = CRISPR_Mutation),
size = 6,
box.padding = unit(1.5, "lines"),
point.padding = unit(0.5, "lines")
)
## Warning: ggrepel: 7 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
test_23Q4_ccle_maf.tnm = trinucleotideMatrix(maf = test_23Q4_ccle_maf, prefix = '', add = TRUE, ref_genome = "BSgenome.Hsapiens.UCSC.hg38")
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:NMF':
##
## nrun
## The following objects are masked from 'package:lubridate':
##
## second, second<-
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:lubridate':
##
## %within%
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:purrr':
##
## reduce
##
## Attaching package: 'XVector'
## The following object is masked from 'package:purrr':
##
## compact
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
##
## strsplit
## -Extracting 5' and 3' adjacent bases
## -Extracting +/- 20bp around mutated bases for background C>T estimation
## -Estimating APOBEC enrichment scores
## --Performing one-way Fisher's test for APOBEC enrichment
## ---APOBEC related mutations are enriched in 12.734 % of samples (APOBEC enrichment score > 2 ; 374 of 2937 samples)
## -Creating mutation matrix
## --matrix of dimension 2937x96
write.table(test_23Q4_ccle_maf.tnm$nmf_matrix, file="23Q4_nmf_maftools.txt", quote=F, sep='\t')
plotApobecDiff(tnm = test_23Q4_ccle_maf.tnm, maf = test_23Q4_ccle_maf, pVal = 0.2)
## --Possible FLAGS among top ten genes:
## TTN
## MUC16
## OBSCN
## -Processing clinical data
## --Possible FLAGS among top ten genes:
## TTN
## MUC16
## OBSCN
## -Processing clinical data
## Showing top 10 of 1624 differentially mutated genes
## $results
## Hugo_Symbol Enriched nonEnriched pval or ci.up
## 1: TP53 288 1560 6.696458e-10 2.1525833 2.8082872
## 2: PIK3CA 63 221 3.312864e-06 2.1460239 2.9284523
## 3: CDKN2A 73 279 7.625498e-06 1.9848605 2.6539443
## 4: TENM3 13 250 1.664036e-05 0.3332583 0.5886347
## 5: JARID2 4 144 2.173181e-05 0.1816654 0.4797932
## ---
## 18789: ZFAND6 1 13 1.000000e+00 0.5259730 3.5210390
## 18790: ZMYND19 1 13 1.000000e+00 0.5259730 3.5210390
## 18791: ZNF804A 24 168 1.000000e+00 0.9775562 1.5309304
## 18792: ARHGEF28 15 106 1.000000e+00 0.9684993 1.6942178
## 18793: RGSL1 15 106 1.000000e+00 0.9684993 1.6942178
## ci.low adjPval
## 1: 1.66308465 1.258465e-05
## 2: 1.55684095 3.112933e-02
## 3: 1.47183837 4.776866e-02
## 4: 0.17307258 6.696016e-02
## 5: 0.04854586 6.696016e-02
## ---
## 18789: 0.01234483 1.000000e+00
## 18790: 0.01234483 1.000000e+00
## 18791: 0.60040903 1.000000e+00
## 18792: 0.51769651 1.000000e+00
## 18793: 0.51769651 1.000000e+00
##
## $SampleSummary
## Cohort SampleSize Mean Median
## 1: Enriched 374 294.457 233.5
## 2: nonEnriched 2563 369.921 175.0
#test_23Q4_ccle_maf.tnm.sig = extractSignatures(mat = test_23Q4_ccle_maf.tnm, n = 60)
test_23Q4_ccle_maf.tnm.sig = extractSignatures(mat = test_23Q4_ccle_maf.tnm, n = 60)
## -Running NMF for factorization rank: 60
## -Finished in00:03:35 elapsed (00:02:50 cpu)
NMF_23Q4_cellline = t(test_23Q4_ccle_maf.tnm.sig$nmfObj@fit@H)
library(umap)
NMF_23Q4_cellline.umap = umap(NMF_23Q4_cellline)
colnames(NMF_23Q4_cellline.umap$layout) = c("Dim1", "Dim2")
library(reshape2)
NMF_umap_plot.data = data.frame(NMF_23Q4_cellline.umap$layout)
NMF_umap_plot.data$Celltype = metadata[match(rownames(NMF_umap_plot.data), metadata$SequencingID), 'Lineage']
ggplot(NMF_umap_plot.data)+geom_point(aes(x=Dim1, y=Dim2, colour=Celltype))
test_23Q4_ccle_maf.tnm.sig.cosm = compareSignatures(nmfRes = test_23Q4_ccle_maf.tnm.sig, sig_db = "SBS")
## -Comparing against COSMIC signatures
## ------------------------------------
## --Found Signature_1 most similar to SBS52
## Aetiology: Possible sequencing artefact [cosine-similarity: 0.939]
## --Found Signature_2 most similar to SBS31
## Aetiology: Prior chemotherapy treatment with platinum drugs [cosine-similarity: 0.847]
## --Found Signature_3 most similar to SBS60
## Aetiology: Possible sequencing artefact [cosine-similarity: 0.945]
## --Found Signature_4 most similar to SBS32
## Aetiology: Prior treatment with azathioprine to induce immunosuppression [cosine-similarity: 0.601]
## --Found Signature_5 most similar to SBS7d
## Aetiology: UV exposure [cosine-similarity: 0.901]
## --Found Signature_6 most similar to SBS41
## Aetiology: Unknown [cosine-similarity: 0.129]
## --Found Signature_7 most similar to SBS25
## Aetiology: Possible exposure to chemotherapy [cosine-similarity: 0.621]
## --Found Signature_8 most similar to SBS21
## Aetiology: DNA mismatch repair deficiency [cosine-similarity: 0.812]
## --Found Signature_9 most similar to SBS17b
## Aetiology: Unknown [cosine-similarity: 0.952]
## --Found Signature_10 most similar to SBS36
## Aetiology: Defective base excision repair due to biallelic germline or somatic MUTYH mutations [cosine-similarity: 0.26]
## --Found Signature_11 most similar to SBS10b
## Aetiology: Polymerase epsilon exonuclease domain mutations [cosine-similarity: 0.917]
## --Found Signature_12 most similar to SBS19
## Aetiology: Unknown [cosine-similarity: 0.529]
## --Found Signature_13 most similar to SBS13
## Aetiology: APOBEC Cytidine Deaminase (C>G) [cosine-similarity: 0.973]
## --Found Signature_14 most similar to SBS8
## Aetiology: Unknown [cosine-similarity: 0.139]
## --Found Signature_15 most similar to SBS53
## Aetiology: Possible sequencing artefact [cosine-similarity: 0.926]
## --Found Signature_16 most similar to SBS33
## Aetiology: Unknown [cosine-similarity: 0.892]
## --Found Signature_17 most similar to SBS24
## Aetiology: exposures to aflatoxin [cosine-similarity: 0.624]
## --Found Signature_18 most similar to SBS9
## Aetiology: defects in polymerase-eta [cosine-similarity: 0.259]
## --Found Signature_19 most similar to SBS50
## Aetiology: Possible sequencing artefact [cosine-similarity: 0.505]
## --Found Signature_20 most similar to SBS32
## Aetiology: Prior treatment with azathioprine to induce immunosuppression [cosine-similarity: 0.486]
## --Found Signature_21 most similar to SBS44
## Aetiology: Defective DNA mismatch repair [cosine-similarity: 0.408]
## --Found Signature_22 most similar to SBS46
## Aetiology: Possible sequencing artefact [cosine-similarity: 0.353]
## --Found Signature_23 most similar to SBS1
## Aetiology: spontaneous or enzymatic deamination of 5-methylcytosine [cosine-similarity: 0.757]
## --Found Signature_24 most similar to SBS39
## Aetiology: Unknown [cosine-similarity: 0.733]
## --Found Signature_25 most similar to SBS46
## Aetiology: Possible sequencing artefact [cosine-similarity: 0.27]
## --Found Signature_26 most similar to SBS55
## Aetiology: Possible sequencing artefact [cosine-similarity: 0.309]
## --Found Signature_27 most similar to SBS42
## Aetiology: Occupational exposure to haloalkanes [cosine-similarity: 0.69]
## --Found Signature_28 most similar to SBS55
## Aetiology: Possible sequencing artefact [cosine-similarity: 0.375]
## --Found Signature_29 most similar to SBS16
## Aetiology: Unknown [cosine-similarity: 0.82]
## --Found Signature_30 most similar to SBS51
## Aetiology: Possible sequencing artefact [cosine-similarity: 0.605]
## --Found Signature_31 most similar to SBS24
## Aetiology: exposures to aflatoxin [cosine-similarity: 0.322]
## --Found Signature_32 most similar to SBS54
## Aetiology: Possible sequencing artefact [cosine-similarity: 0.621]
## --Found Signature_33 most similar to SBS44
## Aetiology: Defective DNA mismatch repair [cosine-similarity: 0.433]
## --Found Signature_34 most similar to SBS4
## Aetiology: exposure to tobacco (smoking) mutagens [cosine-similarity: 0.429]
## --Found Signature_35 most similar to SBS50
## Aetiology: Possible sequencing artefact [cosine-similarity: 0.331]
## --Found Signature_36 most similar to SBS47
## Aetiology: Possible sequencing artefact [cosine-similarity: 0.33]
## --Found Signature_37 most similar to SBS7c
## Aetiology: UV exposure [cosine-similarity: 0.896]
## --Found Signature_38 most similar to SBS28
## Aetiology: Unknown [cosine-similarity: 0.973]
## --Found Signature_39 most similar to SBS48
## Aetiology: Possible sequencing artefact [cosine-similarity: 0.966]
## --Found Signature_40 most similar to SBS84
## Aetiology: Activity of activation-induced cytidine deaminase (AID) [cosine-similarity: 0.28]
## --Found Signature_41 most similar to SBS22
## Aetiology: exposure to aristolochic acid [cosine-similarity: 0.817]
## --Found Signature_42 most similar to SBS22
## Aetiology: exposure to aristolochic acid [cosine-similarity: 0.201]
## --Found Signature_43 most similar to SBS39
## Aetiology: Unknown [cosine-similarity: 0.095]
## --Found Signature_44 most similar to SBS16
## Aetiology: Unknown [cosine-similarity: 0.454]
## --Found Signature_45 most similar to SBS7a
## Aetiology: UV exposure [cosine-similarity: 0.754]
## --Found Signature_46 most similar to SBS39
## Aetiology: Unknown [cosine-similarity: 0.179]
## --Found Signature_47 most similar to SBS49
## Aetiology: Possible sequencing artefact [cosine-similarity: 0.904]
## --Found Signature_48 most similar to SBS84
## Aetiology: Activity of activation-induced cytidine deaminase (AID) [cosine-similarity: 0.673]
## --Found Signature_49 most similar to SBS34
## Aetiology: Unknown [cosine-similarity: 0.399]
## --Found Signature_50 most similar to SBS17a
## Aetiology: Unknown [cosine-similarity: 0.965]
## --Found Signature_51 most similar to SBS53
## Aetiology: Possible sequencing artefact [cosine-similarity: 0.233]
## --Found Signature_52 most similar to SBS18
## Aetiology: Possibly damage by reactive oxygen species [cosine-similarity: 0.48]
## --Found Signature_53 most similar to SBS54
## Aetiology: Possible sequencing artefact [cosine-similarity: 0.643]
## --Found Signature_54 most similar to SBS20
## Aetiology: Concurrent POLD1 mutations and defective DNA mismatch repair [cosine-similarity: 0.85]
## --Found Signature_55 most similar to SBS2
## Aetiology: APOBEC Cytidine Deaminase (C>T) [cosine-similarity: 0.86]
## --Found Signature_56 most similar to SBS58
## Aetiology: Possible sequencing artefact [cosine-similarity: 0.747]
## --Found Signature_57 most similar to SBS59
## Aetiology: Possible sequencing artefact [cosine-similarity: 0.946]
## --Found Signature_58 most similar to SBS10a
## Aetiology: Polymerase epsilon exonuclease domain mutations [cosine-similarity: 0.987]
## --Found Signature_59 most similar to SBS1
## Aetiology: spontaneous or enzymatic deamination of 5-methylcytosine [cosine-similarity: 0.407]
## --Found Signature_60 most similar to SBS15
## Aetiology: Defective DNA mismatch repair [cosine-similarity: 0.811]
## ------------------------------------
pheatmap(mat = test_23Q4_ccle_maf.tnm.sig.cosm$cosine_similarities, cluster_rows = FALSE, main = "cosine similarity against validated signatures", width=20)
sig.cosine.vector = sapply(test_23Q4_ccle_maf.tnm.sig.cosm$best_match, function(x) x$best_match)
sig.cosine.vector = as.numeric(gsub("Best match: SBS.* \\[cosine-similarity: ([0-9].[0-9]*)\\]", "\\1", sig.cosine.vector))
names(sig.cosine.vector) = sapply(test_23Q4_ccle_maf.tnm.sig.cosm$best_match, function(x) x$aetiology)
sig.cosine.df = data.frame(rank=rank(sig.cosine.vector), cosine_similarity=sig.cosine.vector, aetiology=sapply(test_23Q4_ccle_maf.tnm.sig.cosm$best_match, function(x) x$aetiology))
sig.cosine.df = sig.cosine.df %>%
group_by(aetiology) %>%
summarise(Cosine=mean(cosine_similarity, na.rm=T)) %>%
arrange(desc(Cosine))
sig.cosine.df$rank = rank(sig.cosine.df$Cosine)
# in total 60 NMF signatures, aggregates to 21 by mean cosine similarity
ggplot(sig.cosine.df, aes(x=rank, y=Cosine, colour=aetiology))+geom_point()+
geom_label_repel(aes(label = aetiology,
fill = factor(aetiology)), color = 'white',
size = 3.5) +
theme(legend.position = "none", panel.spacing.x = unit(0, "mm"), text = element_text(size=16))
hist(c(test_23Q4_ccle_maf.tnm.sig$contributions))
sig.cutoff = 0.01
sig.freq.samples = t(test_23Q4_ccle_maf.tnm.sig$contributions>sig.cutoff)
sig.freq.samples.lineages = metadata[match(rownames(sig.freq.samples), metadata$SequencingID), 'Lineage']
lineages <- c()
sig.freq.list <- list()
for (lineage in unique(sig.freq.samples.lineages)) {
if (length(dim(sig.freq.samples[sig.freq.samples.lineages==lineage,]))!=0) {
sig.freq.samples.each = sig.freq.samples[sig.freq.samples.lineages==lineage,]
print(dim(sig.freq.samples.each))
sig.freq.list[[lineage]] <- colSums(sig.freq.samples.each)
lineages <- c(lineage, lineages)
}
}
## [1] 47 60
## [1] 178 60
## [1] 97 60
## [1] 77 60
## [1] 413 60
## [1] 130 60
## [1] 28 60
## [1] 52 60
## [1] 63 60
## [1] 191 60
## [1] 47 60
## [1] 115 60
## [1] 111 60
## [1] 193 60
## [1] 211 60
## [1] 96 60
## [1] 70 60
## [1] 136 60
## [1] 129 60
## [1] 173 60
## [1] 34 60
## [1] 41 60
## [1] 97 60
## [1] 88 60
## [1] 14 60
## [1] 30 60
## [1] 25 60
## [1] 4 60
## [1] 28 60
## [1] 6 60
## [1] 3 60
## [1] 5 60
## [1] 2 60
sig.freq.mat = Reduce(rbind, sig.freq.list)
rownames(sig.freq.mat) = lineages
colnames(sig.freq.mat) = sapply(test_23Q4_ccle_maf.tnm.sig.cosm$best_match, function(x) x$aetiology)
pheatmap(mat = sig.freq.mat, cluster_rows = FALSE, cluster_cols = FALSE, main = "Mutation signatures in different lineages", width=20, color = colorRampPalette(c("white", "red"))(30))
LUAD_maf.tnm = trinucleotideMatrix(maf = LUAD_maf, prefix = '', add = TRUE, ref_genome = "BSgenome.Hsapiens.UCSC.hg38")
## -Extracting 5' and 3' adjacent bases
## -Extracting +/- 20bp around mutated bases for background C>T estimation
## -Estimating APOBEC enrichment scores
## --Performing one-way Fisher's test for APOBEC enrichment
## ---APOBEC related mutations are enriched in 24.59 % of samples (APOBEC enrichment score > 2 ; 30 of 122 samples)
## -Creating mutation matrix
## --matrix of dimension 122x96
#LUAD_maf.sign = estimateSignatures(mat = LUAD_maf.tnm, nTry = 5)
#plotCophenetic(res = LUAD_maf.sign)
LUAD_maf.tnm.sig = extractSignatures(mat = LUAD_maf.tnm, n = 2)
## -Running NMF for factorization rank: 2
## -Finished in3.271s elapsed (2.493s cpu)
plotSignatures(nmfRes = LUAD_maf.tnm.sig, title_size = 1.5,
ig_db = "SBS", axis_lwd=2)
## Warning in plot.window(xlim, ylim, log = log, ...): "ig_db" is not a graphical
## parameter
## Warning in axis(if (horiz) 2 else 1, at = at.l, labels = names.arg, lty =
## axis.lty, : "ig_db" is not a graphical parameter
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "ig_db" is not a graphical parameter
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): "ig_db" is not
## a graphical parameter
## Warning in plot.window(xlim, ylim, log = log, ...): "ig_db" is not a graphical
## parameter
## Warning in axis(if (horiz) 2 else 1, at = at.l, labels = names.arg, lty =
## axis.lty, : "ig_db" is not a graphical parameter
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "ig_db" is not a graphical parameter
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): "ig_db" is not
## a graphical parameter
SKCM_maf.tnm = trinucleotideMatrix(maf = SKCM_maf, prefix = '', add = TRUE, ref_genome = "BSgenome.Hsapiens.UCSC.hg38")
## -Extracting 5' and 3' adjacent bases
## -Extracting +/- 20bp around mutated bases for background C>T estimation
## -Estimating APOBEC enrichment scores
## --Performing one-way Fisher's test for APOBEC enrichment
## ---APOBEC related mutations are enriched in 0 % of samples (APOBEC enrichment score > 2 ; 0 of 24 samples)
## -Creating mutation matrix
## --matrix of dimension 24x96
#SKCM_maf.sign = estimateSignatures(mat = SKCM_maf.tnm, nTry = 5)
#plotCophenetic(res = SKCM_maf.sign)
SKCM_maf.tnm.sig = extractSignatures(mat = SKCM_maf.tnm, n = 2)
## -Running NMF for factorization rank: 2
## -Finished in1.154s elapsed (1.117s cpu)
plotSignatures(nmfRes = SKCM_maf.tnm.sig, title_size = 1.5, ig_db = "SBS", axis_lwd=2)
## Warning in plot.window(xlim, ylim, log = log, ...): "ig_db" is not a graphical
## parameter
## Warning in axis(if (horiz) 2 else 1, at = at.l, labels = names.arg, lty =
## axis.lty, : "ig_db" is not a graphical parameter
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "ig_db" is not a graphical parameter
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): "ig_db" is not
## a graphical parameter
## Warning in plot.window(xlim, ylim, log = log, ...): "ig_db" is not a graphical
## parameter
## Warning in axis(if (horiz) 2 else 1, at = at.l, labels = names.arg, lty =
## axis.lty, : "ig_db" is not a graphical parameter
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "ig_db" is not a graphical parameter
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): "ig_db" is not
## a graphical parameter